//==============================================================================
// Project:		Wealth transfers and their economic effects
// File name:	Individual wealth
// Objective: 	Apportion household wealth items to individuals
//
//==============================================================================

clear 

cd "XXXX"

use combined_filtered_restricted.dta, replace

drop if (wave != 2 & wave != 6 & wave != 10 & wave != 14 & wave != 18) //drop waves that don't have a wealth module


***place of residence-related assets and debts

merge m:1 hhrhid wave using restricted_homeownership_weights.dta //read in weights created in 'HILDA regression weights.do'

**age ranges for weights - these match the bins already established
local age_set 15 19 23 27 31 35 39 43 47 51 55 59 63 67 71 75 79 83 

local age //empty list to be defined below

foreach x of local age_set{
	gen age_`x' = 0
	replace age_`x' = 1 if hgage >= `x' & hgage < `x' + 4
	local age `age' age_`x' // builds the list
}

replace age_83 = 1 if hgage >= 83

gen age_0 = 0 
replace age_0 = 1 if hgage < 15

local age age_0 `age'

*check everyone accounted for once
egen all_age = rowtotal(`age')
tab all_age, m //all equals 1 as expected

**gender for weights
gen male = 0
replace male = 1 if sex == 1
gen female = 0
replace female = 1 if male == 0

**flag for if a person owns a share of the residential home 
gen own_home = 0 //creates flag for owning a share of own home, initially set to zero for all

foreach x of num 1/20{
    	
replace own_home = 1 if (hgxid`x' == xwaveid & hsoid`x' == 1) // sets flag equal to 1 if person owns a share of their home
	
}

**create lists
local gender male female
local all_list //empty list to populate later

foreach x of local age{
	
	foreach y of local gender{
			
			gen `x'_`y' = `x'*`y' // create variable of each combination of age and gender
			local all_list `all_list' `x'_`y' //build the list
			
	}
}

*check
egen all_categories = rowtotal(`all_list')
tab all_categories, m //all equals 1 as expected

**calculate shares
gen share_primary_residence = 0

foreach x of local all_list{
	replace share_primary_residence = share_primary_residence + `x'*weight_HH_`x'
}

replace share_primary_residence = 0 if own_home == 0

tab share_primary_residence if own_home == 1, m // no missing values

**seems likely that partners are equal owners, so adjust partner ownership shares to be the average of the two partners

destring hhpxid, replace
codebook hhpxid // note the missing values for instances where there is no partner

drop _merge //to allow further merging later

save temp_main.dta, replace //to reload later

local wave 2 6 10 14 18

foreach x of local wave{ //this follows the process for attaching partner variables supplied with the HILDA documentation, with the exception that it must be done wave by wave
	drop if wave != `x'
	keep xwaveid share_primary_residence
    rename xwaveid hhpxid
	rename share_primary_residence share_primary_residence_partner
	save temp_partner`x'.dta, replace
	clear
	use temp_main.dta
	drop if wave != `x'
	merge m:1 hhpxid using temp_partner`x'.dta
	drop if _merge == 2
	drop _merge
	save temp_main_`x'.dta, replace
	clear
	use temp_main.dta
}

clear

use temp_main_2

append using temp_main_6 temp_main_10 temp_main_14 temp_main_18

codebook share_primary_residence_partner

replace share_primary_residence = (share_primary_residence + share_primary_residence_partner)/2 if share_primary_residence_partner != . //set to the average of both partner's shares

*overall check

egen check1_5_1_1 = total(share_primary_residence), by(hhrhid wave)
tab check1_5_1_1 if check1_5_1_1 > 0 // all equal 1 as required

**Cases where someone who we know is an owner has been weighted to zero
tab share_primary_residence // if own_home == 1, m // 11 people are owners who have been weighted to 0 - reallocate equally among stated owners in these households

gen flag = share_primary_residence
replace flag = 99999 if own_home == 0 // a sufficiently large number
egen min_share_primary_residence = min(flag), by(hhrhid wave) //create flag (instances of zero)
replace share_primary_residence = 0 if min_share_primary_residence == 0 //clear out regression weights in these instances

egen total_own_home = total(own_home), by(hhrhid wave)
gen own_home_share = 1/total_own_home 
replace own_home_share = 0 if own_home == 0 
replace own_home_share = 0 if min_share_primary_residence > 0 //set this weight to zero when regression weight is OK

**Cases where there are assets but no owners are stated
tab hwhmvai if total_own_home == 0 //about 200 people live in HHs with assets but no owners are stated

**Seems safe to assume that houses are not owned by children under 15. What about dependent students or non-dependent children?
tab hgage if hhrih == 9 //all dependent students are under 24
tab own_home if hhrih == 9 //99.5% of dependent students don't own any of the family home. 
tab hgage if hhrih == 10 //about 20 per cent of non-dependent children are over 30.
tab own_home if hhrih == 10 //96.5% of non-dependent children don't own any of the family home.

*Hence, will assume that remainder is split evenly among all who are not children under 15, dependent students or non-dependent children - hereafter, this is called 'standard rules'
gen own_home_adj = 0
replace own_home_adj = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10 & total_own_home == 0
egen total_own_home_adj = total(own_home_adj), by(hhrhid wave)

gen own_home_adj_share = 1/total_own_home_adj
replace own_home_adj_share = 0 if own_home_adj == 0

**checks
gen primary_residence_share = share_primary_residence + own_home_share + own_home_adj_share
egen check1 = total(primary_residence_share), by(hhrhid wave) //share owned by household 
tab check1 if total_own_home > 0,m //all 1 as expected

**create home-related asset and debt variables - assumes debt shared in proportion to ownership

gen pwhmvai = primary_residence_share * hwhmvai
gen pwhmdti = primary_residence_share * hwhmdti

***other property-related assets and debts 

gen own_property = 0

*note, need to do person 1-9 separately from person 10-20 as opown* includes a zero for numbers less than 10, eg opown01
foreach j of num 1/9{
    	
replace own_property = 1 if (hgxid`j' == xwaveid & opown0`j' == 1) // sets flag equal to 1 if person owns a share of the property
	
}

foreach j of num 10/20{
    	
replace own_property = 1 if (hgxid`j' == xwaveid & opown`j' == 1) // sets flag equal to 1 if person owns a share of the property
	
}

tab own_property // 12% of people own other property
egen total_own_property = total(own_property), by(hhrhid wave) // total property owners in each HH
tab total_own_property // up to 5 property owners in each HH

codebook oirntip //nothing less than 0
codebook oirntin //nothing less than 0
gen rent = oirntip - oirntin
gen abs_rent = rent
replace abs_rent = -rent if rent < 0
codebook abs_rent // all positive as expected, no missing

tab abs_rent if own_property == 1 // 55% of people who people who owned rental property had a rental gain or loss 

tab abs_rent if own_property == 0 // 98% of people who don't own property earned no rent from that property. Treat them as not owning property since they property they own was unknown to the person who filled out the HH form and, therefore, will not have been captured


egen total_abs_rent = total(abs_rent), by(hhrhid wave)

tab total_abs_rent if total_own_property > 0 // 62% of rental property-owning HHs had a rental gain or loss


**allocate rental property in proportion to rental gain/loss where available, to listed property owners otherwise, and using standard methods in other cases (with adjustment proposed above)

*share for HHs that earn rent
replace abs_rent = 0 if own_property == 0 // adjustment as proposed above
drop total_abs_rent
egen total_abs_rent = total(abs_rent), by(hhrhid wave) // recalculating due to change above
gen abs_rent_share = abs_rent/total_abs_rent
replace abs_rent_share = 0 if abs_rent == 0


*share for HHs that don't earn rent but have owner recorded
replace total_own_property = 0 if total_abs_rent > 0
gen own_property_share = 1/total_own_property
replace own_property_share = 0 if own_property == 0 
replace own_property_share = 0 if own_property_share == . 


*any remainder? 
tab hwopvai if total_own_property == 0 & total_abs_rent == 0 // 99.9% accounted for - split remainder according to standard rules
gen property_adjustment_flag = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10 & total_own_property == 0 & total_abs_rent == 0
egen total_property_adjustment_flag = total(property_adjustment_flag), by(hhrhid wave)
gen property_adjustment_flag_share = 1/total_property_adjustment_flag
replace property_adjustment_flag_share = 0 if property_adjustment_flag == .
replace property_adjustment_flag_share = 0 if property_adjustment_flag_share == .

*combine
gen property_share = abs_rent_share + own_property_share + property_adjustment_flag_share

egen check1_5_1 = total(property_share), by(hhrhid wave)
tab check1_5_1,m //equals 1 as expected

**create home-related asset and debt variables - assumes debt shared in proportion to ownership

gen pwopvai = property_share * hwopvai 
gen pwopdti = property_share * hwopdti 


***children's bank accounts

codebook hgage // no missing values

gen child_under_15 = 0
replace child_under_15 = 1 if hgage < 15 //flag for child under 15
tab child_under_15 //21% are children under 15
egen total_child_under_15 = total(child_under_15), by(hhrhid wave)
tab total_child_under_15 // up to 10 children in a household, 54% have no children under 15

tab hwcbani if total_child_under_15 == 0 //99.9% accounted for, remainder may be children on the cusp of age 15?
tab hgage if hwcbani > 0 & total_child_under_15 == 0 // Only one child aged 15. Apportion all money to them in that case and otherwise share the money equally among all HH members.

*apportion childrens' bank accounts equally among children

gen child_under_15_share = 1/total_child_under_15
replace child_under_15_share = 0 if child_under_15 == 0 
tab child_under_15_share //21% are zero as expected, ranges from 0 to 1

*adjustment for all cases excepting the one 'missed' 15 yo child
gen flag1 = 1
egen total_HH_members = sum(flag1), by(hhrhid wave)
gen total_HH_members_share = 1/total_HH_members
replace child_under_15_share = total_HH_members_share if total_child_under_15 == 0 // shares funds equally among HH members in each case where there is no child under 15

*ad-hoc adjustment for the single 'missed' 15 yo child
gen ad_hoc_flag = 1 if hwcbani > 0 & total_child_under_15 == 0 & hgage == 15
egen ad_hoc_HH = max(ad_hoc_flag), by(hhrhid wave)
replace child_under_15_share = 0 if ad_hoc_HH == 1
replace child_under_15_share = 1 if ad_hoc_flag == 1 //gives all the money to the 15 year old child in the single case identified.

*check
egen check3 = total(child_under_15_share), by(hhrhid wave)
tab check3,m // all 1 as expected

**create variables
gen pwcbani = child_under_15_share * hwcbani

***Cash investments 
*try to impute shares owned by household members based on oiinti - interest of more than $100 from banks, other financial institutions, bonds,debentures, cash management trusts, family or other private trust funds, or from loans to other persons not in this household.

codebook oiinti // no missing values and nothing less than 0

egen HH_interest_flag = max(oiinti), by(hhrhid wave)
replace HH_interest_flag = 0 if HH_interest_flag < 0
replace HH_interest_flag = 1 if HH_interest_flag > 0 
gen HH_cash_investment_flag = 1 if hwcaini > 0
tab HH_interest_flag HH_cash_investment_flag, m
tab HH_interest_flag if HH_cash_investment_flag == 1 // interest income available as a flag in 81% of cases
egen total_interest = total(oiinti), by(hhrhid wave)
codebook total_interest if HH_cash_investment_flag == 1 // mean HH interest income of $6141
codebook total_interest if HH_cash_investment_flag == . // mean HH interest income of $1281

*split according to share of oiinti in HH, otherwise split evenly among people who are not children under 15, dependent students or non-dependent children following rule established above.

gen interest_share = oiinti/total_interest // share of HH interest earned
*adjustment for households reporting no interest
gen interest_adjustment_flag = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10 & total_interest == 0
egen total_interest_adjustment_flag = total(interest_adjustment_flag), by(hhrhid wave)
gen interest_adjustment_flag_share = 1/total_interest_adjustment_flag
replace interest_adjustment_flag_share = 0 if interest_adjustment_flag == .
replace interest_share = interest_adjustment_flag_share if total_interest == 0 // interest share with the adjustment included

egen check2 = total(interest_share), by(hhrhid wave)
tab check2,m // all 1 as expected

*create variable
gen pwcaini = interest_share * hwcaini

***equity investments 
*either use oifdiva (dividends from company shares, managed funds, or property trusts) or oidvryi (total dividends and royalties [imputed]). oifdiva is more directly related, but lack of imputation may be an issue

*is the lack of imputation of oifdiva an issue?
codebook oifdiva // no missing data
tab oifdiva if oifdiva < 0 // about 2000 refused/not stated/don't know cf. 13 000 people who recieved dividends

codebook oidvryi if oidvryi > 0 // 17 000 people recieved dividends

tab oifdiva if hweqini > 0 // about 5% of people in HHs with equity investments received dividends but refused/not stated/don't know

*how does this translate to HHs?
gen missing_oifdiva = 0
replace missing_oifdiva = 1 if oifdiva == -4 | oifdiva == -3
egen HH_missing_oifdiva = max(missing_oifdiva), by(hhrhid wave)
tab HH_missing_oifdiva if hweqini > 0,m //missing variables a problem in about 10% of cases

*how much do oidvryi and oifdiva differ?
gen ratio = oidvryi/oifdiva if oidvryi > 0 & oifdiva > 0
tab ratio // equal to 1 for about 98% of the data, but the tail takes off very sharply after that - want to be careful with these

egen median_oidvryi_not_missing = median(oidvryi) if oifdiva > 0
egen median_oidvryi_missing = median(oidvryi) if oifdiva == -4 | oifdiva == -3 
egen median_oifdiva = median(oifdiva) if oifdiva > 0

tab median_oidvryi_not_missing // $420
tab median_oidvryi_missing // $450
tab median_oifdiva // $400 *not much difference (in relative terms) in oidvryi between people who report the amount of their dividend and people who receivied one but refused/not stated/don't know when asked about the exact amount

*lack of imputation seems not much of an issue, so will allocate using oifdiva, then oidvryi, then standard rules.

replace oifdiva = 0 if oifdiva < 0
egen total_oifdiva = total(oifdiva), by(hhrhid wave)
replace total_oifdiva = 0 if HH_missing_oifdiva == 1 //disregard if anyone in the HH didn't respond properly to the question
gen oifdiva_share = oifdiva/total_oifdiva
replace oifdiva_share = 0 if oifdiva == 0
replace oifdiva_share = 0 if oifdiva_share ==.

replace oidvryi = 0 if oidvryi < 0
egen total_oidvryi = total(oidvryi), by (hhrhid wave)
replace total_oidvryi = 0 if total_oifdiva > 0 //use oifdiva if it is available
gen oidvryi_share = oidvryi/total_oidvryi
replace oidvryi_share = 0 if oidvryi == 0
replace oidvryi_share = 0 if oidvryi_share ==.

gen equity_adjustment_flag = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10 & total_oifdiva == 0 & total_oidvryi == 0 // use standard method if oifdiva and oidvryi not available
egen total_equity_adjustment_flag = total(equity_adjustment_flag), by(hhrhid wave)
gen equity_adjustment_flag_share = 1/total_equity_adjustment_flag
replace equity_adjustment_flag_share = 0 if equity_adjustment_flag ==.
replace equity_adjustment_flag_share = 0 if equity_adjustment_flag_share ==.

gen total_equity_apportion = total_oifdiva + total_oidvryi + total_equity_adjustment_flag
gen equity_apportion_share = oifdiva_share + oidvryi_share + equity_adjustment_flag_share //composite indicator shares

*check
egen check2_5 = total(equity_apportion_share), by(hhrhid wave)
tab check2_5,m //all sum to 1 as needed 

*create variable
gen pweqini = equity_apportion_share * hweqini

***business assets and debts
*what share of HHs with business assets have business income from owned unincorporated businesses?
codebook bifip // no missing, nothing less than 0
codebook bifin // no missing, nothing less than 0

gen uninc_bus_income = bifip - bifin
gen abs_uninc_bus_income = uninc_bus_income
replace abs_uninc_bus_income = -uninc_bus_income if uninc_bus_income < 0
codebook abs_uninc_bus_income // all positive as required
egen total_abs_uninc_bus_income = total(abs_uninc_bus_income), by(hhrhid wave)

tab total_abs_uninc_bus_income if hwbusvi > 0 // 46% of HHs with business assets have no abs_unincorporated_bus_income

*what share of HHs with business assets received dividends from incorporated private businesses?
tab bifdiva if bifdiva < 0 // refused/not stated is 0.07% - negigibly low
replace bifdiva = 0 if bifdiva < 0
egen total_bifdiva = total(bifdiva), by(hhrhid wave)

tab total_bifdiva if hwbusvi > 0 // 92% of HHs with business assets have no bifdiva

*what share of HHs with business assets received income from owned incorporated private businesses
tab bifiga if bifiga < 0 // refused/not stated is 0.28% - negigibly low
replace bifiga = 0 if bifiga < 0
egen total_bifiga = total(bifiga), by(hhrhid wave)

tab total_bifiga if hwbusvi > 0 // 76% of HHs with business assets have no bifiga

*check the combinations
gen total_bifdiva_bifiga = total_bifdiva + total_bifiga
gen total_bifdiva_uninc = total_bifdiva + total_abs_uninc_bus_income
gen total_bifiga_uninc = total_bifiga + total_abs_uninc_bus_income
gen total_all = total_bifdiva + total_bifiga + total_abs_uninc_bus_income

tab total_bifdiva_bifiga if hwbusvi > 0 // 72% have 0 - no overlap would mean 68%
tab total_bifdiva_uninc if hwbusvi > 0 //40% have 0 - no overlap would mean 38% - this is the most important combination, as these variables are both based on equity rather than wage/salary income from working in the business
tab total_bifiga_uninc if hwbusvi > 0 //26% have 0 - no overlap would mean 22%
tab total_all if hwbusvi > 0 //22% have 0 - no overlap would mean 14%
*overall, not much overlap in the variables, which is good.
*approach: use bifdiva and total_abs_uninc_bus_income, taking average weight where both are reported. Then take bifiga when neither are reported and standard rule otherwise.

gen bifdiva_share = bifdiva/total_bifdiva
replace bifdiva_share = 0 if bifdiva == 0
gen abs_uninc_bus_income_share = abs_uninc_bus_income/total_abs_uninc_bus_income
replace abs_uninc_bus_income_share = 0 if uninc_bus_income == 0
gen bus_equity_share = bifdiva_share + abs_uninc_bus_income_share
replace bus_equity_share = bus_equity_share/2 if total_bifdiva > 0 & total_abs_uninc_bus_income > 0

egen check3_5 = total(bus_equity_share), by(hhrhid wave)
tab check3_5 if total_bifdiva > 0 | total_abs_uninc_bus_income > 0,m // all equals 1 as intended

replace total_bifiga = 0 if total_bifdiva > 0 | total_abs_uninc_bus_income > 0 // use bifiga/total_abs_uninc_bus_income where it is available
gen bifiga_share = bifiga/total_bifiga
replace bifiga_share = 0 if bifiga == 0
replace bifiga_share = 0 if bifiga_share == .

egen check3_5_1 = total(bifiga_share), by(hhrhid wave)
tab check3_5_1 if total_bifiga > 0,m // all equals 1 as intended

gen bus_equity_adj_flag = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10 & total_bifdiva == 0 & total_abs_uninc_bus_income == 0 & total_bifiga == 0 // use standard method if nothing else available
egen total_bus_equity_adj_flag = total(bus_equity_adj_flag), by(hhrhid wave)
gen bus_equity_adj_flag_share = 1/total_bus_equity_adj_flag
replace bus_equity_adj_flag_share = 0 if bus_equity_adj_flag ==.
replace bus_equity_adj_flag_share = 0 if bus_equity_adj_flag_share ==.

egen check3_5_3 = total(bus_equity_adj_flag_share), by(hhrhid wave)
tab check3_5_3 if total_bus_equity_adj_flag > 0,m // all equals 1 as intended

gen bus_equity_apportion_share = bus_equity_share + bifiga_share + bus_equity_adj_flag_share //composite indicator shares
egen check3_5_2 = total(bus_equity_apportion_share), by(hhrhid wave)
tab check3_5_2,m // all equals 1 as intended

**create variables

gen pwbusvi = bus_equity_apportion_share * hwbusvi
gen pwbusdi = bus_equity_apportion_share * hwbusdi

***other assets and debts

**what share of wealth is held in remaining variables?

replace hwobdti = 0 if wave == 2

egen total_unaccounted_wealth = total(hwtrusi + hwinsui + hwcolli + hwvechi + hwobdti), by(wave)
egen total_wealth = total(hwnwip - hwnwin), by(wave)
gen share_unaccounted = total_unaccounted_wealth*100/total_wealth
tab share_unaccounted wave,m //about 8-9% of wealth not yet assigned each year

**for what share of HHs are these significant components of wealth?

egen total_unaccounted_wealth_HH = total(hwtrusi + hwinsui + hwcolli + hwvechi + hwobdti), by(hhrhid wave)
egen total_wealth_HH = total(hwnwip - hwnwin), by(hhrhid wave)
gen share_unaccounted_HH = total_unaccounted_wealth_HH*100/total_wealth_HH
codebook share_unaccounted_HH //over 50% of HH wealth for about 10% of HHs

**hence, assume other assets/debts are held equally by HH members who are not children under 15, dependent students or non-dependent children

**flag for number of people in household who are not children under 15, dependent students or non-dependent children
gen other_asset_holders = 0
replace other_asset_holders = 1 if hhrih != 8 & hhrih !=  9 & hhrih !=  10
egen total_other_asset_holders = total(other_asset_holders), by (hhrhid wave)

tab total_other_asset_holders // every household has at least one person to hold the other assets/debts

gen other_asset_holders_share = 1/total_other_asset_holders
replace other_asset_holders_share = 0 if other_asset_holders == 0
tab other_asset_holders_share // bounded between 0 and 1, 32% will hold no other assets
tab other_asset_holders //matches the above

*check
egen check4 = total(other_asset_holders_share), by(hhrhid wave)
tab check4,m //sums to 1 in all cases

**create variables

gen pwtrusi = other_asset_holders_share * hwtrusi 
gen pwinsui = other_asset_holders_share * hwinsui 

gen pwcolli = other_asset_holders_share * hwcolli 
gen pwvechi = other_asset_holders_share * hwvechi 

gen pwobdti = other_asset_holders_share * hwobdti 

***aggregated wealth variables (those in the wealth module)

gen pwtbani = pwjbani + pwobani + pwcbani
gen pwsupei = pwsupri + pwsupwi
gen pwtpvi = pwhmvai + pwopvai
gen pwccdti = pwjccdi + pwoccdi 
gen pwtpdi = pwhmdti + pwopdti

gen pwfini = pwtbani + pwsupei + pwcaini + pweqini + pwtrusi + pwinsui
gen pwnfii = pwtpvi + pwbusvi + pwcolli + pwvechi

gen pwassei = pwfini + pwnfii
gen pwdebti = pwccdti + pwhecdi + pwothdi + pwbusdi + pwtpdi + pwobdti

****errors are due to inconsistencies in super and other personal debt variables that carry through

keep xwaveid wave pwjbani pwobani pwcbani pwsupri pwsupwi pwcaini pweqini pwtrusi pwinsui pwhmvai pwopvai pwbusvi pwcolli pwvechi pwjccdi pwoccdi pwhecdi pwothdi pwbusdi pwhmdti pwopdti pwobdti pwtbani pwsupei pwtpvi pwccdti pwtpdi pwfini pwnfii pwassei pwdebti primary_residence_share

save "individual_weight_vars_restricted.dta", replace



